Next we’ll attempt to identify malignant cells using single-cell copy number variation estimation as implemented in the CONCISmat package. Details of the GMM methodology used can be found at the Diaz Lab’s GitHub repository.
Primary Tumor
chrom_regions <- read.table("/Volumes/labs/Home/Jen Jen Yeh Lab/Jack/scRNAseq/chrom_arm_positions.txt",
sep = "\t",
row.names = 1,
header = TRUE)
gene_pos <- getGenePositions(rownames(primary))
cpm <- t(t(as.matrix(primary@assays$SCT@counts)) / colSums(as.matrix(primary@assays$SCT@counts))) * 10^5
cpm <- log2(cpm + 1)
norm_factor <- calcNormFactors(cpm)
cnv_est <- plotAll(mat = cpm,
normFactor = norm_factor,
regions = chrom_regions,
gene_pos = gene_pos,
fname = "../Data/Primary")
## [1] "Fitting GMM for chr1 0:122026459 iteration 1"
## number of iterations= 512
## [1] "Fitting GMM for chr1 0:122026459 iteration 2"
## number of iterations= 405
## [1] "Fitting GMM for chr1 0:122026459 iteration 3"
## number of iterations= 354
## [1] "Fitting GMM for chr1 0:122026459 iteration 4"
## number of iterations= 499
## [1] 44
## [1] "Fitting GMM for chr1 124932724:248956422 iteration 1"
## number of iterations= 438
## [1] "Fitting GMM for chr1 124932724:248956422 iteration 2"
## number of iterations= 236
## [1] "Fitting GMM for chr1 124932724:248956422 iteration 3"
## number of iterations= 331
## [1] "Fitting GMM for chr1 124932724:248956422 iteration 4"
## number of iterations= 221
## [1] 44
## [1] "Fitting GMM for chr2 0:92188145 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr2 0:92188145 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr2 0:92188145 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr2 0:92188145 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr2 94090557:242193529 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr2 94090557:242193529 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr2 94090557:242193529 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr2 94090557:242193529 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr3 0:90772458 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr3 0:90772458 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr3 0:90772458 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr3 0:90772458 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr3 93655574:198295559 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr3 93655574:198295559 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr3 93655574:198295559 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr3 93655574:198295559 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr4 51743951:190214555 iteration 1"
## number of iterations= 634
## [1] "Fitting GMM for chr4 51743951:190214555 iteration 2"
## number of iterations= 371
## [1] "Fitting GMM for chr4 51743951:190214555 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr4 51743951:190214555 iteration 4"
## number of iterations= 943
## [1] 44
## [1] "Fitting GMM for chr5 50059807:181538259 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr5 50059807:181538259 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr5 50059807:181538259 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr5 50059807:181538259 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr7 61528020:159345973 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr7 61528020:159345973 iteration 2"
## number of iterations= 947
## [1] "Fitting GMM for chr7 61528020:159345973 iteration 3"
## number of iterations= 669
## [1] "Fitting GMM for chr7 61528020:159345973 iteration 4"
## number of iterations= 573
## [1] 44
## [1] "Fitting GMM for chr9 45518558:138394717 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr9 45518558:138394717 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr9 45518558:138394717 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr9 45518558:138394717 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr10 41593521:133797422 iteration 1"
## number of iterations= 761
## [1] "Fitting GMM for chr10 41593521:133797422 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr10 41593521:133797422 iteration 3"
## number of iterations= 509
## [1] "Fitting GMM for chr10 41593521:133797422 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr11 54425074:135086622 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr11 54425074:135086622 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr11 54425074:135086622 iteration 3"
## number of iterations= 848
## [1] "Fitting GMM for chr11 54425074:135086622 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr12 37185252:133275309 iteration 1"
## number of iterations= 473
## [1] "Fitting GMM for chr12 37185252:133275309 iteration 2"
## number of iterations= 674
## [1] "Fitting GMM for chr12 37185252:133275309 iteration 3"
## number of iterations= 668
## [1] "Fitting GMM for chr12 37185252:133275309 iteration 4"
## number of iterations= 561
## [1] 44
## [1] "Fitting GMM for chr14 18173523:107043718 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr14 18173523:107043718 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr14 18173523:107043718 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr14 18173523:107043718 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr15 19725254:101991189 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr15 19725254:101991189 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr15 19725254:101991189 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr15 19725254:101991189 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr17 26566633:83257441 iteration 1"
## number of iterations= 675
## [1] "Fitting GMM for chr17 26566633:83257441 iteration 2"
## number of iterations= 654
## [1] "Fitting GMM for chr17 26566633:83257441 iteration 3"
## number of iterations= 538
## [1] "Fitting GMM for chr17 26566633:83257441 iteration 4"
## number of iterations= 684
## [1] 44
## [1] "Fitting GMM for chr19 0:24498980 iteration 1"
## number of iterations= 562
## [1] "Fitting GMM for chr19 0:24498980 iteration 2"
## number of iterations= 633
## [1] "Fitting GMM for chr19 0:24498980 iteration 3"
## number of iterations= 661
## [1] "Fitting GMM for chr19 0:24498980 iteration 4"
## number of iterations= 505
## [1] 44
## [1] "Fitting GMM for chr19 27190874:58617616 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr19 27190874:58617616 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr19 27190874:58617616 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr19 27190874:58617616 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
bic_table <- read.table("../Data/Primary_BIC_LR.txt",
sep = "\t",
row.names = 1,
header = TRUE,
check.names = FALSE)
cand_regions <- rownames(bic_table[bic_table$`BIC difference` > 200 & bic_table$`LRT adj. p-val` < .01, ])
We use \(k = 3\) clusters, as we assume that the cells can be divided into 1) PDAC cells, 2) CAF cells, and 3) non-malignant cells. We see that chromosomes 9q, 12q, and 17q have the most estimated CNVs.
hist1 <- plotHistogram(cnv_est[, cand_regions],
cpm,
clusters = 3,
zscoreThreshold = 3,
celltypes = primary$cell_type,
patients = primary$sample)

normal_primary <- which(hist1 == 1)
primary@meta.data$CNV <- ifelse(rownames(primary@meta.data) %in% names(normal_primary),
"Normal", "Malignant")
p12 <- DimPlot(primary, group.by = "CNV") +
labs(subtitle = "Primary Tumor") +
theme_yehlab() +
theme(legend.position = "none",
plot.title = element_blank(),
axis.title = element_blank(),
plot.subtitle = element_text(hjust = 0.5))
Metastatic Tumor
We repeat the process for the metastatic tumor samples.
gene_pos <- getGenePositions(rownames(meta))
cpm <- t(t(as.matrix(meta@assays$SCT@counts)) / colSums(as.matrix(meta@assays$SCT@counts))) * 10^5
cpm <- log2(cpm + 1)
norm_factor <- calcNormFactors(cpm)
cnv_est <- plotAll(mat = cpm,
normFactor = norm_factor,
regions = chrom_regions,
gene_pos = gene_pos,
fname = "../Data/Meta")
## [1] "Fitting GMM for chr1 0:122026459 iteration 1"
## number of iterations= 526
## [1] "Fitting GMM for chr1 0:122026459 iteration 2"
## number of iterations= 504
## [1] "Fitting GMM for chr1 0:122026459 iteration 3"
## number of iterations= 487
## [1] "Fitting GMM for chr1 0:122026459 iteration 4"
## number of iterations= 283
## [1] 44
## [1] "Fitting GMM for chr1 124932724:248956422 iteration 1"
## number of iterations= 413
## [1] "Fitting GMM for chr1 124932724:248956422 iteration 2"
## number of iterations= 563
## [1] "Fitting GMM for chr1 124932724:248956422 iteration 3"
## number of iterations= 304
## [1] "Fitting GMM for chr1 124932724:248956422 iteration 4"
## number of iterations= 538
## [1] 44
## [1] "Fitting GMM for chr2 0:92188145 iteration 1"
## number of iterations= 807
## [1] "Fitting GMM for chr2 0:92188145 iteration 2"
## number of iterations= 660
## [1] "Fitting GMM for chr2 0:92188145 iteration 3"
## number of iterations= 839
## [1] "Fitting GMM for chr2 0:92188145 iteration 4"
## number of iterations= 651
## [1] 44
## [1] "Fitting GMM for chr2 94090557:242193529 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr2 94090557:242193529 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr2 94090557:242193529 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr2 94090557:242193529 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr3 93655574:198295559 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr3 93655574:198295559 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr3 93655574:198295559 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr3 93655574:198295559 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr4 51743951:190214555 iteration 1"
## number of iterations= 959
## [1] "Fitting GMM for chr4 51743951:190214555 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr4 51743951:190214555 iteration 3"
## number of iterations= 997
## [1] "Fitting GMM for chr4 51743951:190214555 iteration 4"
## number of iterations= 718
## [1] 44
## [1] "Fitting GMM for chr5 50059807:181538259 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr5 50059807:181538259 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr5 50059807:181538259 iteration 3"
## number of iterations= 837
## [1] "Fitting GMM for chr5 50059807:181538259 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr6 0:58553888 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr6 0:58553888 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr6 0:58553888 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr6 0:58553888 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr7 61528020:159345973 iteration 1"
## number of iterations= 657
## [1] "Fitting GMM for chr7 61528020:159345973 iteration 2"
## number of iterations= 359
## [1] "Fitting GMM for chr7 61528020:159345973 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr7 61528020:159345973 iteration 4"
## number of iterations= 715
## [1] 44
## [1] "Fitting GMM for chr9 45518558:138394717 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr9 45518558:138394717 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr9 45518558:138394717 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr9 45518558:138394717 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
## [1] "Fitting GMM for chr10 41593521:133797422 iteration 1"
## number of iterations= 526
## [1] "Fitting GMM for chr10 41593521:133797422 iteration 2"
## number of iterations= 274
## [1] "Fitting GMM for chr10 41593521:133797422 iteration 3"
## number of iterations= 557
## [1] "Fitting GMM for chr10 41593521:133797422 iteration 4"
## number of iterations= 372
## [1] 44
## [1] "Fitting GMM for chr11 54425074:135086622 iteration 1"
## number of iterations= 651
## [1] "Fitting GMM for chr11 54425074:135086622 iteration 2"
## number of iterations= 441
## [1] "Fitting GMM for chr11 54425074:135086622 iteration 3"
## number of iterations= 788
## [1] "Fitting GMM for chr11 54425074:135086622 iteration 4"
## number of iterations= 487
## [1] 44
## [1] "Fitting GMM for chr12 37185252:133275309 iteration 1"
## number of iterations= 133
## [1] "Fitting GMM for chr12 37185252:133275309 iteration 2"
## number of iterations= 275
## [1] "Fitting GMM for chr12 37185252:133275309 iteration 3"
## number of iterations= 148
## [1] "Fitting GMM for chr12 37185252:133275309 iteration 4"
## number of iterations= 286
## [1] 44
## [1] "Fitting GMM for chr14 18173523:107043718 iteration 1"
## number of iterations= 788
## [1] "Fitting GMM for chr14 18173523:107043718 iteration 2"
## number of iterations= 385
## [1] "Fitting GMM for chr14 18173523:107043718 iteration 3"
## number of iterations= 531
## [1] "Fitting GMM for chr14 18173523:107043718 iteration 4"
## number of iterations= 456
## [1] 44
## [1] "Fitting GMM for chr15 19725254:101991189 iteration 1"
## number of iterations= 485
## [1] "Fitting GMM for chr15 19725254:101991189 iteration 2"
## number of iterations= 351
## [1] "Fitting GMM for chr15 19725254:101991189 iteration 3"
## number of iterations= 267
## [1] "Fitting GMM for chr15 19725254:101991189 iteration 4"
## number of iterations= 322
## [1] 44
## [1] "Fitting GMM for chr17 26566633:83257441 iteration 1"
## number of iterations= 350
## [1] "Fitting GMM for chr17 26566633:83257441 iteration 2"
## number of iterations= 282
## [1] "Fitting GMM for chr17 26566633:83257441 iteration 3"
## number of iterations= 379
## [1] "Fitting GMM for chr17 26566633:83257441 iteration 4"
## number of iterations= 341
## [1] 44
## [1] "Fitting GMM for chr19 0:24498980 iteration 1"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr19 0:24498980 iteration 2"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr19 0:24498980 iteration 3"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] "Fitting GMM for chr19 0:24498980 iteration 4"
## number of iterations= 220
## [1] 44
## [1] "Fitting GMM for chr19 27190874:58617616 iteration 1"
## number of iterations= 423
## [1] "Fitting GMM for chr19 27190874:58617616 iteration 2"
## number of iterations= 429
## [1] "Fitting GMM for chr19 27190874:58617616 iteration 3"
## number of iterations= 470
## [1] "Fitting GMM for chr19 27190874:58617616 iteration 4"
## WARNING! NOT CONVERGENT!
## number of iterations= 1000
## [1] 44
bic_table <- read.table("../Data/Meta_BIC_LR.txt",
sep = "\t",
row.names = 1,
header = TRUE,
check.names = FALSE)
cand_regions <- rownames(bic_table[bic_table$`BIC difference` > 200 & bic_table$`LRT adj. p-val` < .01, ])
We see that chromosomes 2p, 7q, 12q, and 15q have the most estimated CNVs for the metastatic samples.
hist2 <- plotHistogram(cnv_est[, cand_regions],
cpm,
clusters = 3,
zscoreThreshold = 3,
celltypes = meta$cell_type,
patients = meta$sample)

normal_meta <- which(hist2 == 3)
meta@meta.data$CNV <- ifelse(rownames(meta@meta.data) %in% names(normal_meta),
"Normal", "Malignant")
p13 <- DimPlot(meta, group.by = "CNV") +
labs(subtitle = "Metastatic Tumor") +
theme_yehlab() +
theme(legend.position = "none",
plot.title = element_blank(),
axis.title = element_blank(),
plot.subtitle = element_text(hjust = 0.5))
Let’s check out the results! We see that the cells estimated to be malignant are mostly located in the Epithelial clusters. This is what we’d expect from Pancreatic Ductal Adenocarcinoma. There are also a decent amount of malignant cells in the MSC cluster we identified. Notably, we don’t see a significant concentration of malignant cells in the Fibroblast cluster from the primary samples. This makes me doubt the authors’ annotation of those cells as Cancer Associated Fibroblasts (CAFs).
ggarrange(p12, p13, ncol = 2) %>%
annotate_figure(bottom = "UMAP 1", left = text_grob("UMAP 2", rot = 360),
top = "Malignant Cells Estimated Through CONICSmat")
